pacman::p_load(tidyverse, data.table, broom, stringi, raster, sf, geosphere)
rm(list=ls())
################################################################################ 


########################### Centroids from shapefile data

#Shapefiles
shpfolder <- c("../../data/shapefiles/clean/")

country = 'argentina'
shpfile_ar = st_read(dsn=path.expand(shpfolder), layer=paste0(country,'_all'), quiet=TRUE)
centroid_a <- st_centroid(shpfile_ar, byid=TRUE, id=shpfile_ar@data$cty_id)

country = 'brazil'
shpfile_br = st_read(dsn=path.expand(shpfolder), layer=paste0(country,'_all'), quiet=TRUE)
shpfile_br = shpfile_br %>% st_set_crs(st_crs(NA))
centroid_b <- st_centroid(shpfile_br, byid=TRUE, id=shpfile_br@data$cty_id)

country = 'world_country'
shpfile_w = st_read(dsn=path.expand(shpfolder), layer=country, quiet=TRUE)
centroid_w <- st_centroid(shpfile_w, byid=TRUE, id=shpfile@data$ctry)
row.names(centroid_w) =  stri_trans_general(str = centroid_w$ctry, id = "Latin-ASCII")
countries_list = data.table(row.names(centroid_w))
write.csv(countries_list, paste0(shpfolder,'world_countries.csv'), row.names=FALSE)

#Distance matrix - county to nation
dm <- st_distance(centroid_a, centroid_w)
dm <- data.table(as.matrix(data.table(dm)))
colnames(dm) <- rownames(centroid_w)
dm$origin = centroid_a$cty_id
setcolorder(dm, c("origin"))
keycol <- "destination"
valuecol <- "distance_km"
gathercols <- row.names(centroid_w)
dm <- gather_(dm, keycol, valuecol, gathercols)
dm$distance_km = dm$distance_km/1000
setDT(dm)
dm_a = dm[destination!='Argentina',]

centroid_b = centroid_b %>% st_set_crs(st_crs(centroid_w))
dm <- st_distance(centroid_b, centroid_w)
dm <- data.table(as.matrix(data.table(dm)))
colnames(dm) <- rownames(centroid_w)
dm$origin = centroid_b$cty_id
setcolorder(dm, c("origin"))
keycol <- "destination"
valuecol <- "distance_km"
gathercols <- row.names(centroid_w)
dm <- gather_(dm, keycol, valuecol, gathercols)
dm$distance_km = dm$distance_km/1000
setDT(dm)
dm_b = dm[destination!='Brazil',]

dm = rbind(dm_a,dm_b)

#Combine with destination continents and destination blocs
dfx = fread("../../data/shapefiles/raw/world_countries_crosswalk.csv")
df = merge(dm, dfx, by='destination')

dfy = fread("../../data/trade/clean/country_codes.csv.gz")
df = merge(df, dfy, by='country', all.y=TRUE)
df_dist = df[, list(origin, country, continent, distance_km)]
setnames(df_dist, old=c('origin','country','continent'), new=c('origin_county','destination_country','destination_continent'))
write.csv(df_dist, gzfile(paste0(shpfolder,'distance_county_country.csv.gz')), row.names=FALSE)

#Aggregate to AMC-country level
df = fread('../../data/landuse/clean/geographicunits_argentina.csv.gz')[,list(county_id)]
df$amc_id = df$county_id
df_a = df
df = fread(paste0('../../data/landuse/clean/geographicunits_brazil.csv.gz'))[,list(county_id, amc_id)]
df_b = df
df = rbind(df_a,df_b)
setnames(df, old='county_id', new='origin_county')
df = merge(df_dist, df, by='origin_county')
df = df[,  list(amc_id,destination_country,destination_continent, distance_km)][, lapply(.SD, mean, na.rm=TRUE), by=list(amc_id,destination_country,destination_continent)]
write.csv(df, gzfile(paste0(shpfolder,'distance_amc_country.csv.gz')), row.names=FALSE)



########################### Centroids of source counties

#Sub-national centroids
shpfile_a = st_read(dsn=path.expand("../../data/shapefiles/clean/"), layer='argentina_all', quiet=TRUE)
centroid_a_sp <- st_centroid(shpfile_a, byid=TRUE, id=shpfile_a@data$cty_id)
centroid_a_sp <- centroid_a_sp %>% st_set_crs(st_crs(centroid_a_sp))

shpfile_b = st_read(dsn=path.expand("../../data/shapefiles/clean/"), layer='brazil_all', quiet=TRUE)
shpfile_b <- shpfile_b %>% st_set_crs(st_crs(NA))
centroid_b_sp <- st_centroid(shpfile_b, byid=TRUE, id=shpfile_b@data$cty_id)
centroid_b_sp <- centroid_b_sp %>% st_set_crs(st_crs(centroid_a_sp))

#Distance matrix - county to county - Argentina
dm <- st_distance(centroid_a_sp, centroid_a_sp)
dm <- data.table(as.matrix(data.table(dm)))
colnames(dm) <- centroid_a_sp$cty_id
dm$origin = centroid_a_sp$cty_id
setcolorder(dm, c("origin"))
keycol <- "destination"
valuecol <- "distance_km"
gathercols <- centroid_a_sp$cty_id
dm <- gather_(dm, keycol, valuecol, gathercols)
dm$distance_km = dm$distance_km/1000
setDT(dm)
write.csv(dm, gzfile(paste0(shpfolder,'distance_county_county_argentina.csv.gz')), row.names=FALSE)

#Distance matrix - county to county - Brazil
dm <- st_distance(centroid_b_sp, centroid_b_sp)
dm <- data.table(as.matrix(data.table(dm)))
colnames(dm) <- centroid_b_sp$cty_id
dm$origin = centroid_b_sp$cty_id
setcolorder(dm, c("origin"))
keycol <- "destination"
valuecol <- "distance_km"
gathercols <- centroid_b_sp$cty_id
dm <- gather_(dm, keycol, valuecol, gathercols)
dm$distance_km = dm$distance_km/1000
setDT(dm)
write.csv(dm, gzfile(paste0(shpfolder,'distance_county_county_brazil.csv.gz')), row.names=FALSE)




